from datasets import Dataset
from datasets import load_dataset
data_path = "/data/r1_data_example.jsonl"
train_ds = load_dataset("json", data_files=data_path, split="train[:2000]")
print(train_ds)
# Dataset({
#     features: ['instruction', 'question', 'think', 'answer', 'metrics'],
#     num_rows: 2000 
# })
